This notebook contains the key visualizations for the Task Mapping
paper.
Very useful decision boundary plotting code from: https://mhahsler.github.io/Introduction_to_Data_Mining_R_Examples/book/classification-alternative-techniques.html#k-nearest-neighbors
decisionplot <- function(model, data, class_var,
predict_type = c("class", "prob"), resolution = 5 * 75) {
# resolution is set to 75 dpi if the image is rendered 5 inces wide.
y <- data %>% pull(class_var)
x <- data %>% dplyr::select(-all_of(class_var))
# resubstitution accuracy
prediction <- predict(model, x, type = predict_type[1])
# LDA returns a list
if(is.list(prediction)) prediction <- prediction$class
prediction <- factor(prediction, levels = levels(y))
cm <- confusionMatrix(data = prediction, reference = y)
acc <- cm$overall["Accuracy"]
# evaluate model on a grid
r <- sapply(x[, 1:2], range, na.rm = TRUE)
xs <- seq(r[1,1], r[2,1], length.out = resolution)
ys <- seq(r[1,2], r[2,2], length.out = resolution)
g <- cbind(rep(xs, each = resolution), rep(ys, time = resolution))
colnames(g) <- colnames(r)
g <- as_tibble(g)
### guess how to get class labels from predict
### (unfortunately not very consistent between models)
cl <- predict(model, g, type = predict_type[1])
# LDA returns a list
if(is.list(cl)) {
prob <- cl$posterior
cl <- cl$class
} else
try(prob <- predict(model, g, type = predict_type[2]))
# we visualize the difference in probability/score between the
# winning class and the second best class.
# don't use probability if predict for the classifier does not support it.
max_prob <- 1
try({
max_prob <- t(apply(prob, MARGIN = 1, sort, decreasing = TRUE))
max_prob <- max_prob[,1] - max_prob[,2]
}, silent = TRUE)
cl <- factor(cl, levels = levels(y))
g <- g %>% add_column(prediction = cl, probability = max_prob)
ggplot(g, mapping = aes_string(
x = colnames(g)[1],
y = colnames(g)[2])) +
geom_raster(mapping = aes(fill = prediction, alpha = probability)) +
geom_contour(mapping = aes(z = as.numeric(prediction)),
bins = length(levels(cl)), size = .5, color = "black") +
geom_point(data = data, mapping = aes_string(
x = colnames(data)[1],
y = colnames(data)[2],
shape = class_var), alpha = .7) +
scale_alpha_continuous(range = c(0,1), limits = c(0,1), guide = "none") +
labs(subtitle = paste("Training accuracy:", round(acc, 2)))
}
Load the Data
task_map <- read_csv('../task_map.csv')
Rows: 102 Columns: 25-- Column specification ----------------------------------------------------------------------------------------
Delimiter: ","
chr (1): task
dbl (24): Q1concept_behav, Q3type_1_planning, Q4type_2_generate, Q6type_5_cc, Q7type_7_battle, Q8type_8_perf...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Plot the Task Map and other Related Images
Draw the task map using PCA & clustering
First, run the PCA
set.seed(1)
pca <- task_map %>% #select(-continuous_questions) %>%
select(-task) %>%
prcomp(center = T)
# get optimal number of clusters -- "silhouette" method
fviz_nbclust(x = pca$x, FUNcluster = stats::kmeans, method = "silhouette") +
labs(subtitle = "Silhouette method")

# get optimal number of clusters
NbClust(data = pca$x, distance = "euclidean",
min.nc = 2, max.nc = 15, method = "kmeans")
*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.

*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 9 proposed 2 as the best number of clusters
* 3 proposed 3 as the best number of clusters
* 2 proposed 4 as the best number of clusters
* 5 proposed 10 as the best number of clusters
* 2 proposed 13 as the best number of clusters
* 1 proposed 14 as the best number of clusters
* 2 proposed 15 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 2
*******************************************************************
$All.index
KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman Rubin Cindex DB
2 5.4470 61.4035 14.4335 0.9622 221.9375 164410.8392 48.5404 99.8470 6.7370 1.6140 0.4272 1.2541
3 0.9212 41.9264 15.6316 0.4562 397.8075 65962.2674 35.8229 87.2532 9.9607 1.8470 0.4592 1.7097
4 34.1211 37.1952 1.4591 1.7101 579.4047 19768.3559 25.6963 75.3550 15.5991 2.1386 0.4459 1.6280
5 0.1297 28.3839 4.4654 -0.1287 701.1324 9364.8251 25.6267 74.2495 20.5842 2.1705 0.4027 1.5794
6 0.2807 24.3914 12.9929 -0.8555 772.2602 6714.5189 23.1698 70.9819 19.9124 2.2704 0.3907 1.4941
7 5.0651 24.9798 3.4111 1.2775 931.4178 1919.7696 16.4934 62.5202 28.6105 2.5777 0.4335 1.5758
8 0.5516 22.4287 5.3286 0.0887 948.1492 2128.1093 15.0974 60.3532 28.9981 2.6702 0.4417 1.6543
9 0.2735 21.1760 19.9755 -0.1272 1097.9023 620.4146 13.6645 57.1155 37.1941 2.8216 0.4201 1.6851
10 6.9115 24.8159 3.5564 4.2723 1412.1301 35.1788 8.3850 47.0167 65.2931 3.4276 0.3675 1.4949
11 0.6885 23.2973 4.9020 3.6451 1501.7500 17.6801 8.4934 45.2669 71.7599 3.5601 0.3629 1.4095
12 1.7052 22.5157 3.1285 3.4910 1662.6670 4.3442 7.5335 42.9531 74.7842 3.7519 0.3648 1.2815
13 0.6085 21.3774 4.8517 2.8132 1725.7302 2.7474 7.0349 41.5101 77.8436 3.8823 0.3610 1.2427
14 1.0322 20.9439 4.8261 2.7635 1828.9448 1.1583 6.3322 39.3643 84.8649 4.0940 0.3264 1.3681
15 0.8228 20.6221 5.9833 2.8535 2033.4956 0.1790 6.2626 37.3177 89.8610 4.3185 0.3610 1.4006
Silhouette Duda Pseudot2 Beale Ratkowsky Ball Ptbiserial Frey McClain Dunn Hubert SDindex
2 0.3498 1.4321 -17.4995 -4.9369 0.0575 49.9235 0.6834 1.7824 0.5636 0.1982 0.0161 2.8083
3 0.2483 0.5136 28.4081 15.4698 0.0994 29.0844 0.5878 -0.0301 1.2183 0.2395 0.0164 3.3612
4 0.2538 3.6681 -25.4584 -10.7034 0.1083 18.8388 0.6390 1.5820 1.3200 0.2524 0.0197 3.0087
5 0.2249 1.6467 -5.1055 -5.5039 0.1270 14.8499 0.5845 0.2054 1.7697 0.2395 0.0175 3.1680
6 0.2210 4.8002 -30.0837 -11.4118 0.1315 11.8303 0.5888 1.3888 1.8554 0.2395 0.0174 2.9098
7 0.1657 1.0931 -1.5324 -1.3294 0.1362 8.9315 0.4876 -3.0782 3.1452 0.2207 0.0202 3.5619
8 0.1382 4.3481 -12.3202 -10.3596 0.1342 7.5441 0.4365 0.1626 3.9109 0.2223 0.0204 4.1061
9 0.1601 1.1020 -0.8329 -1.2969 0.1391 6.3462 0.4348 -0.1788 4.2569 0.2165 0.0209 3.7886
10 0.2342 2.6811 -22.5727 -9.8417 0.1416 4.7017 0.5062 -1.5738 3.6719 0.2190 0.0228 3.7292
11 0.2249 0.6907 4.0296 6.8452 0.1423 4.1152 0.4794 -0.1361 4.0758 0.1755 0.0232 3.5309
12 0.2415 1.0628 -0.2953 -0.7449 0.1468 3.5794 0.4938 0.3309 3.9643 0.1818 0.0232 3.4085
13 0.2437 0.8068 5.0290 3.8442 0.1461 3.1931 0.4908 0.2872 4.0520 0.1818 0.0235 3.5236
14 0.2070 2.3198 -3.9826 -6.3786 0.1451 2.8117 0.4649 1.0053 4.8952 0.1818 0.0247 3.4845
15 0.2032 1.1460 -2.2931 -1.9996 0.1477 2.4878 0.4335 0.2185 5.7441 0.2738 0.0251 3.9810
Dindex SDbw
2 0.9422 0.4544
3 0.8797 0.3788
4 0.8187 0.3184
5 0.8118 0.3386
6 0.7922 0.3131
7 0.7474 0.2751
8 0.7342 0.2539
9 0.7112 0.2516
10 0.6467 0.2004
11 0.6319 0.1808
12 0.6165 0.1643
13 0.6064 0.1555
14 0.5904 0.1560
15 0.5743 0.1588
$All.CriticalValues
CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
2 0.8262 12.2032 1
3 0.8221 6.4903 0
4 0.6708 17.1766 1
5 0.6303 7.6260 1
6 0.6524 20.2431 1
7 0.7390 6.3565 1
8 0.6028 10.5440 1
9 0.6303 5.2795 1
10 0.7465 12.2272 1
11 0.7113 3.6520 0
12 0.5674 3.8122 1
13 0.7841 5.7810 0
14 0.5195 6.4755 1
15 0.7465 6.1136 1
$Best.nc
KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman Rubin Cindex
Number_clusters 4.0000 2.0000 10.000 10.0000 10.0000 3.00 3.0000 4.0000 10.000 10.0000 14.0000
Value_Index 34.1211 61.4035 16.419 4.2723 314.2278 52254.66 12.7175 10.7927 28.099 -0.4735 0.3264
DB Silhouette Duda PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain Dunn
Number_clusters 13.0000 2.0000 2.0000 2.0000 2.0000 15.0000 3.0000 2.0000 2.0000 2.0000 15.0000
Value_Index 1.2427 0.3498 1.4321 -17.4995 -4.9369 0.1477 20.8391 0.6834 1.7824 0.5636 0.2738
Hubert SDindex Dindex SDbw
Number_clusters 0 2.0000 0 13.0000
Value_Index 0 2.8083 0 0.1555
$Best.partition
[1] 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2
[54] 2 1 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 1 1 1 2 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

kmeans_output <- pca$x %>%
kmeans(centers = 3, nstart = 100)
combined_data <- cbind(task_map,
pca$x, factor(kmeans_output$cluster)) %>%
rename(cluster = `factor(kmeans_output$cluster)`)
fviz_eig(pca)

Standard Task Map Image with All Labels
p <- combined_data %>%
ggplot(aes(
x = PC1,
y = PC2,
label = task,
fill = cluster
)) + geom_point() + geom_label(nudge_y = 0.1, size = 4) +
#+ , alpha=0.05) +
# highlights only the ones in the selected set
# geom_label(
# data = subset(combined_data, task %in% c("NASA Moon survival", "Desert survival")),
# aes(
# x = PC1,
# y = PC2,
# label = task ,
# fill = cluster
# ),
# nudge_y = 0.1,
# size = 2
# )
theme_light(base_size = 24)
p # show the plot

ggsave(plot = p, filename = '../task-map.png')
Saving 28 x 10 in image
Task Map Image Highlighting Specific Subsets (for Illustrative
Purposes)
# An illustrative set to display
display_set <- c('Writing story',
'Advertisement writing',
'Desert survival',
'NASA Moon survival',
'Ultimatum game (various versions)',
'Dictator game and its variants',
'Prisoner\'s Dilemma (various versions)',
'9 Dot Problem',
'Word construction from a subset of letters',
'Typing game',
'Ravens Matrices',
'Euclidean traveling salesperson'
)
# A set of the tasks that are most different
max_diff_set <- c('Shopping plan',
'Minimal Group Paradigm (study diversity)',
'9 Dot Problem',
'Whac-A-Mole',
'Bullard Houses',
'Putting food into categories',
'Checkers',
'Reproducing arts',
'Allocating resources to programs',
'Image rating',
'Arithmetic problem 2')
# A set of tasks that are the most similar
min_diff_set <- c('Arithmetic problem 1',
'Euclidean traveling salesperson',
'Abstract grid task',
'Mastermind',
'Logic Problem',
'Guessing the correlation',
'Random dot motion',
'Letters-to-numbers problems (cryptography)',
'Computer maze',
'Recall images',
'Recall stories')
# A set of tasks that illustrates opportunities to add new tasks
display_limitations_set <- c('Recall word lists',
'Hidden figures in a picture (Recall Task)',
'Recall images',
'Recall stories',
'Recall videos',
'Writing story',
'Advertisement writing')
graph_illustrative_plots <- function(displayset, filename){
p <- combined_data %>%
ggplot(aes(
x = PC1,
y = PC2,
#label = task,
#fill = cluster
)) + geom_point(aes(size = 4)) +
#geom_point(aes(color = cluster, size = 4)) +
#highlights only the ones in the selected set
geom_label(
data = subset(combined_data, task %in% displayset),
aes(
x = PC1,
y = PC2,
label = task
),
nudge_y = 0.15,
size = 4
) +
geom_point(data = subset(combined_data, task %in% displayset), aes(size = 4),
color = "firebrick1") +
theme_minimal(base_size = 18) + theme(legend.position = "none")
p
ggsave(plot = p, filename = filename, width = 14, height = 5)
}
#graph_illustrative_plots(display_limitations_set, '../images/task-map_with_new_task_opportunities_highlighted.png')
#graph_illustrative_plots(max_diff_set, '../images/task-map_with_max_diff_highlighted.png')
graph_illustrative_plots(min_diff_set, '../images/task-map_with_min_diff_highlighted.png')
Create a cool 3D version
plot_ly(
x = combined_data$PC1,
y = combined_data$PC2,
z = combined_data$PC3,
type = "scatter3d",
mode = "markers", # can use mode = "text"
text = combined_data$task ,
color = combined_data$cluster
)
Create synthetic dependent variable based on the clusters
tasks_with_dv <- subset(combined_data, task %in% max_diff_set) %>%
mutate(
synergy = as.factor(ifelse(cluster == 1 | cluster == 2, 1, 0))
)
combined_data <- combined_data %>%
mutate(
synergy = as.factor(ifelse(cluster == 1 | cluster == 2, 1, 0))
)
Fitting and Visualizing Models for the Task Map.
x <- combined_data %>% select(PC1, PC2, synergy, task)
train <- tasks_with_dv %>% select(PC1, PC2, synergy, task)
model <- train %>% svm(synergy ~ PC1 + PC2, data = ., kernel = "linear")
svmplot <- decisionplot(model, x, class_var = "synergy") +
geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
geom_label(data = train, aes(label = task ), nudge_y = 0.1, size = 3) +
labs(title = "SVM (Linear Kernel)") +
theme_minimal(base_size = 12)
svmplot
ggsave('svmplot_synthetic_data.png', width = 12, height = 5)

model <- train %>% knn3(synergy ~ PC1 + PC2, data = ., k = 1)
knnplot <- decisionplot(model, x, class_var = "synergy") +
geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
geom_label(data = train, aes(label = task ), nudge_y = 0.1, size = 3) +
labs(title = "kNN (1 neighbor)") +
theme_minimal(base_size = 12)
knnplot
ggsave('knnplot_synthetic_data.png', width = 12, height = 5)

---
title: "R Notebook for Paper-Related Visualizations"
output: html_notebook
---

This notebook contains the key visualizations for the Task Mapping paper.

```{r}
library(factoextra)
library(NbClust)
library(cluster)
library(plotly)
library(ggplot2)
library(caret) #for knn
library(e1071) #for svm
library(dplyr)
library(tidyverse)
```

Very useful decision boundary plotting code from: https://mhahsler.github.io/Introduction_to_Data_Mining_R_Examples/book/classification-alternative-techniques.html#k-nearest-neighbors
```{r decisionplot}

decisionplot <- function(model, data, class_var, 
  predict_type = c("class", "prob"), resolution = 5 * 75) {
  # resolution is set to 75 dpi if the image is rendered  5 inces wide. 
  
  y <- data %>% pull(class_var)
  x <- data %>% dplyr::select(-all_of(class_var))
  
  # resubstitution accuracy
  prediction <- predict(model, x, type = predict_type[1])
  # LDA returns a list
  if(is.list(prediction)) prediction <- prediction$class
  prediction <- factor(prediction, levels = levels(y))
  
  cm <- confusionMatrix(data = prediction, reference = y)
  acc <- cm$overall["Accuracy"]
  
  # evaluate model on a grid
  r <- sapply(x[, 1:2], range, na.rm = TRUE)
  xs <- seq(r[1,1], r[2,1], length.out = resolution)
  ys <- seq(r[1,2], r[2,2], length.out = resolution)
  g <- cbind(rep(xs, each = resolution), rep(ys, time = resolution))
  colnames(g) <- colnames(r)
  g <- as_tibble(g)
  
  ### guess how to get class labels from predict
  ### (unfortunately not very consistent between models)
  cl <- predict(model, g, type = predict_type[1])
  
  # LDA returns a list
  if(is.list(cl)) { 
    prob <- cl$posterior
    cl <- cl$class
  } else
    try(prob <- predict(model, g, type = predict_type[2]))
  
  # we visualize the difference in probability/score between the 
  # winning class and the second best class.
  # don't use probability if predict for the classifier does not support it.
  max_prob <- 1
  try({
    max_prob <- t(apply(prob, MARGIN = 1, sort, decreasing = TRUE))
    max_prob <- max_prob[,1] - max_prob[,2]
  }, silent = TRUE) 
  
  cl <- factor(cl, levels = levels(y))
  
  g <- g %>% add_column(prediction = cl, probability = max_prob)
  
  ggplot(g, mapping = aes_string(
    x = colnames(g)[1],
    y = colnames(g)[2])) +
    geom_raster(mapping = aes(fill = prediction, alpha = probability)) +
     geom_contour(mapping = aes(z = as.numeric(prediction)), 
      bins = length(levels(cl)), size = .5, color = "black") +
    geom_point(data = data, mapping =  aes_string(
      x = colnames(data)[1],
      y = colnames(data)[2],
      shape = class_var), alpha = .7) + 
    scale_alpha_continuous(range = c(0,1), limits = c(0,1), guide = "none") +  
    labs(subtitle = paste("Training accuracy:", round(acc, 2)))
}
```

# Load the Data
```{r}
task_map <- read_csv('../task_map.csv')
```

# Plot the Task Map and other Related Images

Draw the task map using PCA & clustering

First, run the PCA
```{r, fig.width=16, fig.height=5}
set.seed(1)

pca <- task_map %>% #select(-continuous_questions) %>%
  select(-task) %>%
  prcomp(center = T)

# get optimal number of clusters -- "silhouette" method
fviz_nbclust(x = pca$x, FUNcluster = stats::kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette method")

# get optimal number of clusters
NbClust(data = pca$x, distance = "euclidean",
        min.nc = 2, max.nc = 15, method = "kmeans")

kmeans_output <- pca$x %>% 
  kmeans(centers = 3, nstart = 100)

combined_data <- cbind(task_map,
      pca$x, factor(kmeans_output$cluster)) %>%
  rename(cluster = `factor(kmeans_output$cluster)`)

fviz_eig(pca)
```

Standard Task Map Image with All Labels
```{r, fig.width=14, fig.height=5}
p <- combined_data %>%
  ggplot(aes(
    x = PC1,
    y = PC2,
    label = task,
    fill = cluster
  )) + geom_point() + geom_label(nudge_y = 0.1, size = 4) +
  
  #+ , alpha=0.05) +
# highlights only the ones in the selected set
  # geom_label(
  #   data = subset(combined_data, task %in% c("NASA Moon survival", "Desert survival")),
  #   aes(
  #     x = PC1,
  #     y = PC2,
  #     label = task ,
  #     fill = cluster
  #   ),
  #   nudge_y = 0.1,
  #   size = 2
  # )
 theme_light(base_size = 24)

p # show the plot

ggsave(plot = p, filename = '../task-map.png')
```

Task Map Image Highlighting Specific Subsets (for Illustrative Purposes)
```{r, fig.width=14, fig.height=5}
# An illustrative set to display
display_set <- c('Writing story',
 'Advertisement writing', 
 'Desert survival',
 'NASA Moon survival',
 'Ultimatum game (various versions)',
 'Dictator game and its variants',
 'Prisoner\'s Dilemma (various versions)',
 '9 Dot Problem',
 'Word construction from a subset of letters',
 'Typing game',
 'Ravens Matrices',
 'Euclidean traveling salesperson'
 )

# A set of the tasks that are most different
max_diff_set <- c('Shopping plan',
 'Minimal Group Paradigm (study diversity)',
 '9 Dot Problem',
 'Whac-A-Mole',
 'Bullard Houses',
 'Putting food into categories',
 'Checkers',
 'Reproducing arts',
 'Allocating resources to programs',
 'Image rating',
 'Arithmetic problem 2')

# A set of tasks that are the most similar
min_diff_set <- c('Arithmetic problem 1',
 'Euclidean traveling salesperson',
 'Abstract grid task',
 'Mastermind',
 'Logic Problem',
 'Guessing the correlation',
 'Random dot motion',
 'Letters-to-numbers problems (cryptography)',
 'Computer maze',
 'Recall images',
 'Recall stories')

# A set of tasks that illustrates opportunities to add new tasks
display_limitations_set <- c('Recall word lists',
                             'Hidden figures in a picture (Recall Task)',
                             'Recall images',
                             'Recall stories',
                             'Recall videos',
                             'Writing story',
                             'Advertisement writing')


graph_illustrative_plots <- function(displayset, filename){
  p <- combined_data %>%
  ggplot(aes(
    x = PC1,
    y = PC2,
    #label = task,
    #fill = cluster
    )) + geom_point(aes(size = 4)) +
  #geom_point(aes(color = cluster, size = 4)) +
#highlights only the ones in the selected set
geom_point(data = subset(combined_data, task %in% displayset), aes(size = 4),
           color = "firebrick1") +
geom_label(
  data = subset(combined_data, task %in% displayset),
  aes(
    x = PC1,
    y = PC2,
    label = task
  ),
  nudge_y = 0.15,
  size = 4
) +
 theme_minimal(base_size = 18) + theme(legend.position = "none")  

p

ggsave(plot = p, filename = filename, width = 14, height = 5)
}
```

```{r, fig.width=14, fig.height=5}
graph_illustrative_plots(display_limitations_set, '../images/task-map_with_new_task_opportunities_highlighted.png')

graph_illustrative_plots(max_diff_set, '../images/task-map_with_max_diff_highlighted.png')

# graph_illustrative_plots(min_diff_set, '../images/task-map_with_min_diff_highlighted.png')
```

Create a cool 3D version
```{r}
plot_ly(
  x = combined_data$PC1,
  y = combined_data$PC2,
  z = combined_data$PC3,
  type = "scatter3d",
  mode = "markers", # can use mode = "text"
  text = combined_data$task ,
  color = combined_data$cluster
)
```

Create synthetic dependent variable based on the clusters
```{r}
tasks_with_dv <- subset(combined_data, task %in% max_diff_set) %>%
  mutate(
    synergy = as.factor(ifelse(cluster == 1 | cluster == 2, 1, 0))
  )
combined_data <- combined_data %>%
  mutate(
    synergy = as.factor(ifelse(cluster == 1 | cluster == 2, 1, 0))
  )
```

# Fitting and Visualizing Models for the Task Map.

```{r}
x <- combined_data %>% select(PC1, PC2, synergy, task)
train <- tasks_with_dv %>% select(PC1, PC2, synergy, task)
model <- train %>% svm(synergy ~ PC1 + PC2, data = ., kernel = "linear")

svmplot <- decisionplot(model, x, class_var = "synergy") + 
  geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
  geom_label(data = train, aes(label = task ), nudge_y = 0.1, size = 3) +
  labs(title = "SVM (Linear Kernel)") +
  theme_minimal(base_size = 12)

svmplot
  
ggsave('svmplot_synthetic_data.png', width = 12, height = 5)
```

```{r}
model <- train %>% knn3(synergy ~ PC1 + PC2, data = ., k = 1)

knnplot <- decisionplot(model, x, class_var = "synergy") +
  geom_point(data = train, aes(x = PC1, y = PC2, shape = synergy), color = "darkolivegreen2", show.legend = F) +
  geom_label(data = train, aes(label = task ), nudge_y = 0.1, size = 3) +
  labs(title = "kNN (1 neighbor)") + 
  theme_minimal(base_size = 12)

knnplot
  
ggsave('knnplot_synthetic_data.png', width = 12, height = 5)
```